import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('log.csv', parse_dates=['Joined', 'Left', 'Time'])
df['Time'] = df['Left'] - df['Joined']
df
min_datetime = df['Joined'].min()
max_datetime = df['Left'].max()
min_date, max_date = min_datetime.date(), max_datetime.date()
unique_members = df['Member Name'].unique().tolist()
if 'Sauron' in unique_members:
unique_members.remove('Sauron')
print(f'Data collected over {max_datetime - min_datetime} with {len(unique_members)} unique members')
start = min_datetime.replace(minute=0, second=0, microsecond=0)
end = max_datetime.replace(minute=0, second=0, microsecond=0) + pd.offsets.Hour(1)
attendance_data = []
date = start
while date < end:
members = df[(df['Joined'] <= date + pd.offsets.Minute(1)) & (df['Left'] >= date)]['Member Name'].to_list()
attendance_data.append([date] + [member in members for member in unique_members])
date += pd.offsets.Minute(1)
att_df = pd.DataFrame(attendance_data, columns=['Date'] + unique_members)
axes = att_df.set_index('Date').rolling(1).mean().plot(figsize=(14, 24), yticks=[0, 1], subplots=True, sharex=True, kind='area')
for ax in axes:
ax.set_yticklabels(['Disconnected', 'Connected'])
plt.subplots_adjust()
plt.show()
def part_of_day(date):
if 0 <= date.hour < 6:
return '00:00 - 05:59'
elif 6 <= date.hour < 12:
return '06:00 - 11:59'
elif 12 <= date.hour < 18:
return '12:00 - 17:59'
else:
return '18:00 - 23:59'
daily_att = att_df.groupby(att_df['Date'].dt.day_name()).sum() / 60
daily_att.index = pd.CategoricalIndex(['Sunday', 'Monday', 'Tuesday', 'Wednessday', 'Thursday', 'Friday', 'Saturday'])
hourly_att = att_df.groupby(att_df['Date'].apply(part_of_day)).sum() / 60
all_att = att_df.sum() / 60
colormap = plt.cm.tab20(np.linspace(0,1,len(unique_members)))
daily_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Daily Attendance', xlabel='Weekday', rot=0, color=colormap)
plt.show()
hourly_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Hourly Attendance', xlabel='Time', rot=0, color=colormap)
plt.show()
all_att.sort_values(ascending=False).plot(use_index=True, kind='bar', figsize=(20, 5), title='Total Attendance', xlabel=None, ylabel='Hours', rot=0, color=colormap)
plt.show()
corr = att_df.corr()
corr
plt.figure(figsize=(20, 20))
sns.heatmap(corr, cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
plt.figure(figsize=(20, 20))
sns.heatmap(corr[(abs(corr) > 0.3)], cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
pd.DataFrame(corr.unstack().sort_values().drop_duplicates())